package util;

import us.codecraft.webmagic.Page;
    import us.codecraft.webmagic.Site;
    import us.codecraft.webmagic.Spider;
import downloader.RandomHeaderDownloader;
 import us.codecraft.webmagic.processor.PageProcessor;
    import us.codecraft.webmagic.proxy.Proxy;
    import us.codecraft.webmagic.proxy.SimpleProxyProvider;

    /**
     * 基于WebMagic 0.7.3版本，关于WebMagic代理详细设置请看WebMagic官方文档
     **/
    public class WebMagicProxyDemo implements PageProcessor {

        private Site site = Site.me().setRetryTimes(3).setSleepTime(100);

        @Override
        public void process(Page page) {
            page.putField("html", page.getHtml());
        }

        @Override
        public Site getSite() {
            return site;
        }

        public static void main(String[] args) {
            // 代理服务器
            final String    proxyHost = "http-dyn.abuyun.com";
            final Integer   proxyPort = 9020;

            // 代理隧道验证信息
            final String    proxyUser = "H189I8A0F5BCNE4D";
            final String    proxyPass = "9D34D77FD49D3FDB";

            final String 	targetUrl ="https://www.baidu.com/";

            RandomHeaderDownloader randomHeaderDownloader = new RandomHeaderDownloader();
            randomHeaderDownloader.setProxyProvider(SimpleProxyProvider.from(new Proxy(proxyHost,proxyPort,proxyUser,proxyPass)));
            Spider.create(new WebMagicProxyDemo()).setDownloader(randomHeaderDownloader).addUrl(targetUrl).thread(5).run();
        }
    }